library(data.table)
library(tidyverse)
library(rtracklayer)
library(DESeq2)
library(Biostrings)
library(Rsubread)
library(pheatmap)
library(gridExtra)
library(ggrepel)
library(CLIPanalyze)
library(ggplot2)
library(RColorBrewer)
library(mixOmics)
library(plotly)
Load the rda file from merging all peaks and filtered peaks from HVA/HVAK individual CLIPanalyze call. Counting was re-do using featureCount and DESeq2 were called to do differential analysis.
dir.create("PDF_figure/CLIP_DESeq_09282019_merged", showWarnings = FALSE)
load("../Merged_Analysis/merged_peak_analysis.rda")
plotMA(dds.genes,
main = "MA plot for all HEAP vs IC\n in genes outside peaks")
pdf("PDF_figure/CLIP_DESeq_09282019_merged/MAPlot_all_HEAPvIC_OutsidePeaks.pdf",
height = 4,
width = 5)
plotMA(dds.genes,
main = "MA plot for all HEAP vs IC\n in genes outside peaks")
dev.off()
## quartz_off_screen
## 2
plotMA(dds.peaks,
main = "MA plot for all HEAP vs IC in peaks,\none-sided test")
pdf("PDF_figure/CLIP_DESeq_09282019_merged/MAPlot_all_HEAPvIC_InPeaks.pdf",
height = 4,
width = 5)
plotMA(dds.peaks,
main = "MA plot for all HEAP vs IC in peaks,\none-sided test")
dev.off()
## quartz_off_screen
## 2
No peak filtering was done here as peak was filtred and selected in their individual analysis.
plotMA(hvak.hva.res,
main = "MA plot for HVAK over HVA\nin all peaks (significant over input)",
xlab = "Mean of normalized counts")
pdf("PDF_figure/CLIP_DESeq_09282019_merged/MAPlot_HVAKvHVA_peaks.pdf",
height = 4,
width = 5)
plotMA(hvak.hva.res,
main = "MA plot for HVAK over HVA\nin all peaks (significant over input)",
xlab = "Mean of normalized counts")
dev.off()
## quartz_off_screen
## 2
summary(hvak.hva.res)
##
## out of 7653 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 1760, 23%
## LFC < 0 (down) : 48, 0.63%
## outliers [1] : 40, 0.52%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
dds_transform <- varianceStabilizingTransformation(dds.peaks.hva.hvak)
mat.dist = as.data.frame(assay(dds_transform))
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
setwd("/Users/mizuhi/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_09282019/Analysis/CLIP_Analysis/Data Visualization")
png('Hierchical_Clustering_merged_peaks.png')
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
suppressMessages(dev.off())
## quartz_off_screen
## 2
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Hierchical_Clustering_merged_peaks.pdf")
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
dev.off()
## quartz_off_screen
## 2
Final output is following:
plotPCA(dds_transform, intgroup = "tumor", ntop = 500) +
geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-50,130) + ylim(-60,50)
pdf("PDF_figure/CLIP_DESeq_09282019_merged/PCA_merged_peaks.pdf",
height = 4,
width = 5)
plotPCA(dds_transform, intgroup = "tumor", ntop = 500) +
geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-50,130) + ylim(-60,50)
dev.off()
## quartz_off_screen
## 2
I would like to just do PCA on samples from HEAP-CLIP_05212019.
dds_05212019 <- dds_transform[, c(paste0("HVA",1:3), paste0("HVAK",1:3))]
plotPCA(dds_05212019, intgroup = "tumor", ntop = 500) +
geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-80,40) + ylim(-40,30)
pdf("PDF_figure/CLIP_DESeq_09282019_merged/PCA_merged_peaks_batch1.pdf",
height = 4,
width = 5)
plotPCA(dds_05212019, intgroup = "tumor", ntop = 500) +
geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-80,40) + ylim(-40,30)
dev.off()
## quartz_off_screen
## 2
I would like to just do PCA on samples from HEAP-CLIP_09282019.
dds_09282019 <- dds_transform[, c(paste0("HVA",4:6), paste0("HVAK",4:6))]
plotPCA(dds_09282019, intgroup = "tumor", ntop = 500) +
geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-75,130) + ylim(-40,40)
pdf("PDF_figure/CLIP_DESeq_09282019_merged/PCA_merged_peaks_batch2.pdf",
height = 4,
width = 5)
plotPCA(dds_09282019, intgroup = "tumor", ntop = 500) +
geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-75,130) + ylim(-40,40)
dev.off()
## quartz_off_screen
## 2
filtered.peaks$hvak.hva.padj <- as.numeric(NA)
filtered.peaks$hvak.hva.log2FC <- as.numeric(NA)
filtered.peaks[rownames(hvak.hva.res), ]$hvak.hva.padj <-
hvak.hva.res$padj
filtered.peaks[rownames(hvak.hva.res), ]$hvak.hva.log2FC <-
hvak.hva.res$log2FoldChange
Save the datasets
saveRDS(filtered.peaks, "Datafiles/merged-peaks-filtered-09282019.rds")
Map seed to peaks
mirna.info.family <- readRDS("mirna-info-family-seedmatches.rds")
assignMirToPeaks <- function(miRNA = mirs,
peaks = es.peaks.utr3,
database = mirna.info.family){
require(BSgenome)
require(CLIPanalyze)
require(Biostrings)
bsgenome <- load.bsgenome("mm10")
peaks.seq <- get.seqs(bsgenome, peaks)
peaks$seed.8mer <- as.character(NA)
peaks$seed.7m8 <- as.character(NA)
peaks$seed.7A1 <- as.character(NA)
peaks$seed.6mer <- as.character(NA)
for (i in 1:length(miRNA)){
mir <- miRNA[i]
#Prepare seed matches
mir.6m <- as.character(database[database$miR.family %in% mir, ]$seedmatch.6)
mir.7m8 <- as.character(database[database$miR.family %in% mir, ]$seedmatch.m8)
mir.7mA <- as.character(database[database$miR.family %in% mir, ]$seedmatch.A1)
mir.8m <- as.character(database[database$miR.family %in% mir, ]$seedmatch.8)
#Filter peaks with 6mer matches
match <- GRanges(vmatchPattern(mir.6m, peaks.seq))
#Go back to peaks and map the seed match and extend 1 nt on both directions
match.extend <- peaks[seqnames(match),]
match.strand <- as.logical(strand(match.extend) == "+")
start(match.extend[match.strand,]) <- start(match.extend[match.strand,]) + start(match[match.strand,]) - 2
match.extend[match.strand,] <- resize(match.extend[match.strand,], fix = "start", width = 8)
end(match.extend[!match.strand,]) <- end(match.extend[!match.strand,]) - start(match[!match.strand,]) + 2
match.extend[!match.strand,] <- resize(match.extend[!match.strand,], fix = "start", width = 8)
#Assign seed types to these matches
match.extend.seq <- get.seqs(bsgenome, match.extend)
match.extend.seq.df <- data.frame(Peaks = names(match.extend.seq),
Sequence = as.character(match.extend.seq))
for (j in 1:nrow(match.extend.seq.df)){
peak.name <- as.character(match.extend.seq.df[j, "Peaks"])
seq <- as.character(match.extend.seq.df[j, "Sequence"])
if (grepl(mir.8m, seq)){
peaks[peak.name, ]$seed.8mer <- ifelse(is.na(peaks[peak.name, ]$seed.8mer),
mir,
paste(peaks[peak.name, ]$seed.8mer, mir, sep = ", "))
} else if (grepl(mir.7m8, seq)){
peaks[peak.name, ]$seed.7m8 <- ifelse(is.na(peaks[peak.name, ]$seed.7m8),
mir,
paste(peaks[peak.name, ]$seed.7m8, mir, sep = ", "))
} else if (grepl(mir.7mA, seq)){
peaks[peak.name, ]$seed.7A1 <- ifelse(is.na(peaks[peak.name, ]$seed.7A1),
mir,
paste(peaks[peak.name, ]$seed.7A1, mir, sep = ", "))
} else {
peaks[peak.name, ]$seed.6mer <- ifelse(is.na(peaks[peak.name, ]$seed.6mer),
mir,
paste(peaks[peak.name, ]$seed.6mer, mir, sep = ", "))
}
}
}
return(peaks)
}
miRNAs with mean counts larger than 200 are selected and mapped to peaks
mirna.family.DGE <- readRDS("Datafiles/mirna-counts-deseq-by-family-09282019.rds")
mirs <- subset(mirna.family.DGE, baseMean > 200)
mirs <- mirs$miR.family
#peaks.mirs <- assignMirToPeaks(miRNA = mirs,
# peaks = filtered.peaks,
# database = mirna.info.family)
#saveRDS(peaks.mirs, "Datafiles/merged-peaks-mirs-200-09282019.rds")
peaks.mirs <- readRDS("Datafiles/merged-peaks-mirs-200-09282019.rds")
targetofmiR <- function(peaks.mir = brain.peaks.mirs,
miRNA = "",
sitetype = "8mer"){
peaks.mir.sub <- as.data.frame(peaks.mir[,c("hvak.hva.log2FC", "hvak.hva.padj",
"seed.8mer", "seed.7m8", "seed.7A1", "seed.6mer")])
peaks.seedmatch <- lapply(c("seed.8mer", "seed.7m8", "seed.7A1", "seed.6mer"),
function(seed){
map <- peaks.mir.sub[grepl(miRNA, peaks.mir.sub[,seed]),]
map <- rownames(map)
map
})
names(peaks.seedmatch) <- c("seed.8mer", "seed.7m8", "seed.7A1", "seed.6mer")
if (sitetype == "8mer"){
maps <- peaks.seedmatch[[1]]
} else if (sitetype == "7mer_above"){
maps <- unique(unlist(peaks.seedmatch[1:3]))
} else if (sitetype == "7mer"){
maps <- unique(unlist(peaks.seedmatch[2:3]))
} else if (sitetype == "6mer"){
maps <- peaks.seedmatch[[4]]
} else {
print("Please input site type as: 8mer, 7mer_above, 7mer or 6mer")
}
return(peaks.mir[maps])
}
mirs.peaks <- lapply(mirs,
function(mir){
targetofmiR(miRNA = mir,
peaks.mir = peaks.mirs,
sitetype = "7mer_above")
})
names(mirs.peaks) <- mirs
saveRDS(mirs.peaks, "Datafiles/miRNA-merged-peaks-list-09282019.rds")
lens <- as.data.frame(sapply(mirs.peaks, function(x) length(x)))
mirs.peaks.log2FC.median <- sapply(mirs.peaks,
function(list){
log2FCs <- list$hvak.hva.log2FC
return(median(log2FCs[!is.na(log2FCs)]))
})
mirs.peaks.log2FC.mean <- sapply(mirs.peaks,
function(list){
log2FCs <- list$hvak.hva.log2FC
return(mean(log2FCs[!is.na(log2FCs)]))
})
mirs.targets.log2FC <- cbind(as.data.frame(mirs.peaks.log2FC.median),
as.data.frame(mirs.peaks.log2FC.mean),
lens)
colnames(mirs.targets.log2FC) <- c("targets_log2FC_median","targets_log2FC_mean", "N")
mirs.targets.log2FC <- merge(mirs.targets.log2FC, mirna.family.DGE[,c("log2FoldChange", "padj")], by = 0)
colnames(mirs.targets.log2FC)[1] <- "miR.family"
Show any miRNA that has more than 20 peaks matched
df <- subset(mirs.targets.log2FC, N > 20)
p <- ggplot(df, aes(x = log2FoldChange, y = targets_log2FC_median, label = miR.family, size = N)) +
geom_point(colour = "#ECAC46", alpha = 0.6) +
#scale_color_manual(fill = "yellow") +
geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
xlab("miRNA expression Fold change log2 (HVAK / HVA)") +
ylab("Median of peak signal changes log2 (HVAK / HVA)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p1 <- ggplotly(p)
p1
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Median_peak_signal_LFC.pdf",
width = 5,
height = 4)
p
dev.off()
## quartz_off_screen
## 2
p <- ggplot(df, aes(x = log2FoldChange, y = targets_log2FC_mean, label = miR.family, size = N)) +
geom_point(colour = "#7AEF64", alpha = 0.6) +
#scale_color_manual(fill = "yellow") +
geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
xlab("miRNA expression Fold change log2 (HVAK / HVA)") +
ylab("Mean of peak signal changes log2 (HVAK / HVA)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p2 <- ggplotly(p)
p2
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Mean_peak_signal_LFC.pdf",
width = 5,
height = 4)
p
dev.off()
## quartz_off_screen
## 2
If we only look at the top 40 highly expressed miRNAs
mirna.family.DGE <- mirna.family.DGE[order(-mirna.family.DGE$baseMean),]
mirs.40 <- rownames(mirna.family.DGE)[1:40]
df.40 <- df[df$miR.family %in% mirs.40,]
p <- ggplot(df.40, aes(x = log2FoldChange, y = targets_log2FC_median, label = miR.family, size = N)) +
geom_point(colour = "#ECAC46", alpha = 0.6) +
#scale_color_manual(fill = "yellow") +
geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
xlab("Top 40 miRNA expression Fold change log2 (HVAK / HVA)") +
ylab("Median of peak signal changes log2 (HVAK / HVA)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p3 <- ggplotly(p)
p3
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Median_peak_signal_LFC_top40_miRNA.pdf",
width = 5,
height = 4)
p
dev.off()
## quartz_off_screen
## 2
p <- ggplot(df.40, aes(x = log2FoldChange, y = targets_log2FC_mean, label = miR.family, size = N)) +
geom_point(colour = "#7AEF64", alpha = 0.6) +
#scale_color_manual(fill = "yellow") +
geom_hline(yintercept = 0, linetype = "dashed", size = 0.3) +
geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
xlab("Top 40 miRNA expression Fold change log2 (HVAK / HVA)") +
ylab("Mean of peak signal changes log2 (HVAK / HVA)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p4 <- ggplotly(p)
p4
pdf("PDF_figure/CLIP_DESeq_09282019_merged/Mean_peak_signal_LFC_top40_miRNA.pdf",
width = 5,
height = 4)
p
dev.off()
## quartz_off_screen
## 2
Here are the top 10 miRNAs with the most peaks associated.
color.vec <- brewer.pal(name = "Spectral", n = 11)
df <- as.data.frame(filtered.peaks)
df$hvak.hva.logP <- -log10(as.numeric(df$hvak.hva.padj))
len <- sapply(mirs.peaks, function(x) length(x))
mirs.peaks <- mirs.peaks[order(-len)]
mirna <- names(mirs.peaks)
mirs.peaks.names <- lapply(mirs.peaks, function(x) names(x))
for (i in 1:10){
p <- ggplot() +
geom_point(data = df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.5, color = "grey80") +
geom_point(data = df[mirs.peaks.names[[mirna[i]]],], aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.7, color = "#7570B3") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", size = 0.3) +
geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", size = 0.3) +
xlab("Fold change log2 (HVAK / HVA)") +
ylab("-log10(FDR)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
#guides(fill = guide_legend(title = "miRNA family")) +
ggtitle(sprintf("%s targets", mirna[i ]))
print(p)
}
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
pdf("PDF_figure/CLIP_DESeq_09282019_merged/VolcanoPlot_Peak_by_miRNAFamily.pdf",
width = 5,
height = 4)
for (i in 1:10){
p <- ggplot() +
geom_point(data = df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.5, color = "grey80") +
geom_point(data = df[mirs.peaks.names[[mirna[i]]],], aes(x = hvak.hva.log2FC, y = hvak.hva.logP), size = 1, alpha = 0.7, color = "#7570B3") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", size = 0.3) +
geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", size = 0.3) +
xlab("Fold change log2 (HVAK / HVA)") +
ylab("-log10(FDR)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
#guides(fill = guide_legend(title = "miRNA family")) +
ggtitle(sprintf("%s targets", mirna[i ]))
print(p)
}
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
# Plot all peak density
ggplot(df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP)) +
geom_hex(bins = 40, color = "white", aes(fill = stat(log2(count)))) +
scale_fill_viridis_c() +
geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
xlab("Fold change log2 (HVAK / HVA)") +
ylab("-log10(FDR)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
#guides(fill = guide_legend(title = "miRNA family")) +
ggtitle("All peaks")
## Warning: Removed 117 rows containing non-finite values (stat_binhex).
# Number of peaks with LFC on either side
# LFC > 0
LFC <- df$hvak.hva.log2FC[!is.na(df$hvak.hva.log2FC)]
sum(LFC > 0)
## [1] 7074
sum(LFC > 0)/length(LFC) * 100
## [1] 92.43434
# LFC < 0
sum(LFC < 0)
## [1] 579
sum(LFC < 0)/length(LFC) * 100
## [1] 7.565661
pdf("PDF_figure/CLIP_DESeq_09282019_merged/VolcanoPlot_all_peaks.pdf",
width = 5,
height = 4)
# Plot density of peak points on teh volcano plot
ggplot(df, aes(x = hvak.hva.log2FC, y = hvak.hva.logP)) +
geom_hex(bins = 40, color = "white", aes(fill = stat(log2(count)))) +
scale_fill_viridis_c() +
geom_vline(xintercept = 0, linetype = "dashed", size = 0.3) +
xlab("Fold change log2 (HVAK / HVA)") +
ylab("-log10(FDR)") +
theme_bw() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12, margin = margin(t = 10)),
axis.title.y = element_text(size=12, margin = margin(r = 10)),
axis.text = element_text(size=10),
axis.line.y = element_line(size = 0.5),
axis.line.x = element_line(size = 0.5),
axis.ticks.x = element_line(size = 0),
axis.ticks.y = element_line(size = 0.5),
plot.title = element_text(hjust = 0.5, size = 12, face = "bold")) +
#guides(fill = guide_legend(title = "miRNA family")) +
ggtitle("All peaks")
## Warning: Removed 117 rows containing non-finite values (stat_binhex).
dev.off()
## quartz_off_screen
## 2
I just want a table of all top 40 miRNAs and the number of peaks that are associated with them with LFC (HVAK/HVA) > 0 and < 0.
peak.number <- as.data.frame(table(mirs.peaks[[1]]$hvak.hva.log2FC > 0))
peak.number <- t(peak.number[,-1])
colnames(peak.number) <- c("LFC(HVAK/HVA) < 0", "LFC(HVAK/HVA) > 0")
for (i in 2: 40) {
new.peak <- as.data.frame(table(mirs.peaks[[i]]$hvak.hva.log2FC > 0))
new.peak <- t(new.peak[,-1])
peak.number <- rbind(peak.number, new.peak)
}
rownames(peak.number) <- names(mirs.peaks)[1:40]
peak.number <- rbind(peak.number, c(sum(df$hvak.hva.log2FC > 0), sum(df$hvak.hva.log2FC < 0)))
rownames(peak.number)[dim(peak.number)[1]] <- "Total"
peak.number
## LFC(HVAK/HVA) < 0 LFC(HVAK/HVA) > 0
## let-7-5p/miR-98-5p 56 738
## miR-15-5p/16-5p/195-5p/322-5p/497-5p 10 468
## miR-29-3p 14 454
## miR-200-3p/429-3p 43 390
## miR-17-5p/20-5p/93-5p/106-5p 33 403
## miR-24-3p 16 371
## miR-194-5p 15 342
## miR-27-3p 7 292
## miR-196-5p 14 251
## miR-103-3p/107-3p 5 254
## miR-23-3p 10 239
## miR-26-5p 15 228
## miR-19-3p 13 218
## miR-25-3p/32-5p/92-3p/363-3p/367-3p 12 197
## miR-30-5p/384-5p 15 168
## miR-141-3p 12 170
## miR-484 5 174
## miR-130-3p/301-3p 14 153
## miR-22-3p 6 146
## miR-96-5p 15 137
## miR-181-5p 13 133
## miR-182-5p 17 124
## miR-148-3p/152-3p 16 123
## miR-7-5p 1 128
## miR-205-5p 11 117
## miR-34-5p/449-5p 9 105
## miR-132-3p/212-3p 5 99
## miR-30a-3p/30d-3p/30e-3p 4 97
## miR-31-5p 37 51
## miR-194-2-3p/6926-5p/7055-5p 6 81
## miR-423-5p 2 83
## miR-141-5p/6769b-3p 10 75
## miR-21ac-5p 7 75
## miR-192-5p/215-5p 9 72
## miR-139-5p 3 76
## miR-193-3p 1 75
## miR-185-5p 5 71
## miR-671-5p 6 65
## miR-135ab-5p 2 63
## miR-18-3p/7069-3p 3 61
## Total NA NA
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] plotly_4.10.0 mixOmics_6.16.3
## [3] lattice_0.20-45 MASS_7.3-54
## [5] RColorBrewer_1.1-2 CLIPanalyze_0.0.10
## [7] GenomicAlignments_1.28.0 Rsamtools_2.8.0
## [9] GenomicFeatures_1.44.2 AnnotationDbi_1.54.1
## [11] plyr_1.8.6 ggrepel_0.9.1
## [13] gridExtra_2.3 pheatmap_1.0.12
## [15] Rsubread_2.6.4 Biostrings_2.60.2
## [17] XVector_0.32.0 DESeq2_1.32.0
## [19] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [21] MatrixGenerics_1.4.3 matrixStats_0.61.0
## [23] rtracklayer_1.52.1 GenomicRanges_1.44.0
## [25] GenomeInfoDb_1.28.4 IRanges_2.26.0
## [27] S4Vectors_0.30.2 BiocGenerics_0.38.0
## [29] forcats_0.5.1 stringr_1.4.0
## [31] dplyr_1.0.7 purrr_0.3.4
## [33] readr_2.1.0 tidyr_1.1.4
## [35] tibble_3.1.6 ggplot2_3.3.5
## [37] tidyverse_1.3.1 data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.4.0 BiocFileCache_2.0.0
## [4] igraph_1.2.9 lazyeval_0.2.2 splines_4.1.1
## [7] crosstalk_1.2.0 BiocParallel_1.26.2 digest_0.6.28
## [10] htmltools_0.5.2 fansi_0.5.0 magrittr_2.0.1
## [13] memoise_2.0.1 tzdb_0.2.0 annotate_1.70.0
## [16] modelr_0.1.8 rARPACK_0.11-0 prettyunits_1.1.1
## [19] colorspace_2.0-2 blob_1.2.2 rvest_1.0.2
## [22] rappdirs_0.3.3 haven_2.4.3 xfun_0.28
## [25] hexbin_1.28.2 crayon_1.4.2 RCurl_1.98-1.5
## [28] jsonlite_1.7.2 genefilter_1.74.1 survival_3.2-13
## [31] glue_1.5.0 gtable_0.3.0 zlibbioc_1.38.0
## [34] DelayedArray_0.18.0 scales_1.1.1 DBI_1.1.1
## [37] Rcpp_1.0.7 viridisLite_0.4.0 xtable_1.8-4
## [40] progress_1.2.2 bit_4.0.4 biosignals_0.1.0
## [43] htmlwidgets_1.5.4 httr_1.4.2 ellipsis_0.3.2
## [46] farver_2.1.0 pkgconfig_2.0.3 XML_3.99-0.8
## [49] dbplyr_2.1.1 locfit_1.5-9.4 utf8_1.2.2
## [52] tidyselect_1.1.1 labeling_0.4.2 rlang_0.4.12
## [55] reshape2_1.4.4 munsell_0.5.0 cellranger_1.1.0
## [58] tools_4.1.1 cachem_1.0.6 cli_3.1.0
## [61] generics_0.1.1 RSQLite_2.2.8 broom_0.7.10
## [64] evaluate_0.14 fastmap_1.1.0 yaml_2.2.1
## [67] knitr_1.36 bit64_4.0.5 fs_1.5.0
## [70] KEGGREST_1.32.0 xml2_1.3.2 biomaRt_2.48.3
## [73] compiler_4.1.1 rstudioapi_0.13 filelock_1.0.2
## [76] curl_4.3.2 png_0.1-7 reprex_2.0.1
## [79] geneplotter_1.70.0 stringi_1.7.5 highr_0.9
## [82] RSpectra_0.16-0 Matrix_1.3-4 vctrs_0.3.8
## [85] pillar_1.6.4 lifecycle_1.0.1 jquerylib_0.1.4
## [88] bitops_1.0-7 corpcor_1.6.10 R6_2.5.1
## [91] BiocIO_1.2.0 assertthat_0.2.1 rjson_0.2.20
## [94] withr_2.4.2 GenomeInfoDbData_1.2.6 hms_1.1.1
## [97] grid_4.1.1 rmarkdown_2.11 lubridate_1.8.0
## [100] ellipse_0.4.2 restfulr_0.0.13